Assumptions

  • winback subscribers were members, then began a trial and became members again
  • visitor token matches ip or device address
  • winbacks are using different email/phone numbers

Importing Data

artists = read.csv("Artists.csv")
events = read.csv("Events.csv")
linkartists = read.csv("LinkArtists.csv")

Data Processing

# converting data into propper format
events = events %>% mutate(Timestamp = ymd_hms(events$Timestamp),
                           Year = year(Timestamp),
                           Month = month(Timestamp),
                           Day = day(Timestamp),
                           Hour = hour(Timestamp),
                           Quarter = quarter(Timestamp)
                           )

# modifying column names to match 
colnames(linkartists)[which(names(linkartists)=="X.ArtistId")] ="ArtistId"
colnames(events)[which(names(events)=="Linkid")] ="LinkId"

#combining tables
linkartists = linkartists %>% left_join(artists)
## Joining, by = "ArtistId"
#switching LinkId to lowercase to match events data
linkartists = linkartists %>% mutate(LinkId = tolower(LinkId))  

#joining linkartists to match LinkId with Artist
events = events %>% left_join(linkartists) 
## Joining, by = "LinkId"
## Warning: Column `LinkId` joining factor and character vector, coercing into
## character vector

Data Exploration

Looking at most frequent or most popular values in each field

# Summary of browsers used in descending order
sort(summary(events$Browser),decreasing=T) 
##           Instagram App                  Safari            Facebook App 
##                    8117                    7031                    1822 
##                  Chrome Mobile Safari UIWebView                Line App 
##                     773                     546                     327 
##              Google App            Snapchat App                 Firefox 
##                     182                     165                     143 
##                   Weibo             Twitter App      Facebook Messenger 
##                     106                      86                      62 
##                    Edge         Samsung Browser    Generic Ruby Crawler 
##                      23                      16                       7 
##              UC Browser              WeChat App         Firefox for iOS 
##                       4                       4                       3 
##                   Opera          Yandex Browser              Apple Mail 
##                       3                       3                       2 
##         Coc Coc Browser                  Puffin           Whale Browser 
##                       2                       2                       2 
##         Android WebView              CM Browser                    Iron 
##                       1                       1                       1 
##            Mint Browser              Opera Mini 
##                       1                       1
# Top 5 Cities
head(sort(summary(events$City),decreasing=T))
## (Other) Unknown   Lagos  Taipei   Tokyo   Osaka 
##   11432    1847     441     259     215     193
# Top 5 countries
head(sort(summary(events$Countrycode),decreasing=T))
##   US   JP   GB   RU   FR   CA 
## 5621 4603  819  665  596  592
# Table of Member Types 
sort(summary(events$Convtype),decreasing=T)
##      trial membership 
##      11189       8247
# Table of Member Plans
sort(summary(events$Convname),decreasing=T)
## Individual plan    Student plan     Family plan 
##           17686            1165             585
# Table of Devices
sort(summary(events$Device),decreasing=T)
##        mobile       desktop        tablet  Mobile Phone Mobile Device 
##         17839           956           287           251            79 
##       Desktop       crawler        Tablet 
##            13             7             4

Finding New Subscribers and Winback Customers

Method I wanted to looking at Linkfire visitors who had visited at least twice in order to see whether or not they were new subscribers or winback customers.

I assumed that visitors who were once on trial and then began memberships became new subscribers.

For Winback customers, I looked for users who were members before going on a trial period and then back to being members. I also counted the number of times a single user resubscribed to their streaming service if they switched between trial and membership services multiple times.

repeated_id = events$Visitortoken[duplicated(events$Visitortoken)]

# All users that have visited Linkfire more than once
repeated_id = unique(repeated_id) 

events_repeat = events[events$Visitortoken %in% repeated_id,]

# Most frequent users
head(sort(summary(events$Visitortoken),decreasing = T)) 
##                          (Other) c4c24e5ead78d4a3a7edxxxxxxxxxxxx 
##                            18449                               46 
## 1c0445de0b6266964690xxxxxxxxxxxx df30982ac360fad69fcaxxxxxxxxxxxx 
##                               26                               22 
## a50bd11b4c71fb0a5e32xxxxxxxxxxxx 472c7c3fecfc63152692xxxxxxxxxxxx 
##                               17                               16
events_repeat = events_repeat[order(events_repeat$Visitortoken,events_repeat$Timestamp),]

events_repeat$new_sub = rep(0, nrow(events_repeat))
events_repeat$win_back = rep(0,nrow(events_repeat))

new_id = 0
lost = 0
was_member = 0
on_trial = 0

for(i in 1:nrow(events_repeat)) {
  # Checks if looking at new user
  if(new_id == 1 && events_repeat$Visitortoken[i-1]!=events_repeat$Visitortoken[i]) {
    new_id = 0
    lost = 0
    was_member = 0
    on_trial = 0
  }
  # Assigns values if this is a user's first visit
  if(new_id == 0) { 
    new_id=1
    if(events_repeat$Convtype[i]=="trial") {on_trial=1; 
    } else if(events_repeat$Convtype[i]=="membership") {was_member=1; }
    next
  } 
  # Checks to see if user becomes new subscriber or winback
  if(new_id == 1) { 
    if(events_repeat$Convtype[i]=="trial" && on_trial==1 && lost == 0) { 
    } else if(events_repeat$Convtype[i] == "membership" && was_member == 1 && lost == 0) { on_trial = 0; was_member=1;
    } else if(events_repeat$Convtype[i] == "trial" && was_member == 1) {lost=1; on_trial= 0;
    } else if(events_repeat$Convtype[i] == "membership" && lost == 1 && on_trial==0 && was_member==1) {events_repeat$win_back[i]=1; lost=0;
    } else if(events_repeat$Convtype[i] == "membership" && on_trial==1 && lost == 0) {events_repeat$new_sub[i]=1;on_trial=0;was_member=1;}
  }
}

Number of New Subscribers and Winbacks

# Number new subscribed members
sum(events_repeat$new_sub)
## [1] 2480
# Instances of customers returning to their subscriptions   
sum(events_repeat$win_back) 
## [1] 8

Ratio of New Subscribers and Winbacks to Total Amount of Linkfire Visitors

# Number of unique visitors total
length(unique(events$Visitortoken)) 
## [1] 14507
# 17% of Linkfire visitors who become new subscribers
2480/14507 * 100 
## [1] 17.0952
# .05% of Linkfire visitors who resubscribe 
8/14507 * 100 
## [1] 0.05514579

Total Number of Unique Linkfire visitors in 2019 and 2020

# Number of unique linkfire visitors in 2019
length(unique(events$Visitortoken[which(events$Year==2019,)]))
## [1] 6382
# Number of unique linkfire visitors in 2020
length(unique(events$Visitortoken[which(events$Year==2020,)]))
## [1] 9811
# Percent change in Linkfire visitors from 2019 to 2020
((9811-6382)/6382)*100
## [1] 53.72924

Tables

Comparing different fields by the percentage of new subscribers or Winbacks

# City with highest percentage of new subscribers 
city_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$City, data = events_repeat, FUN = sum)
city_new_sub = city_new_sub %>% arrange(desc(city_new_sub[,2]))
head(city_new_sub)
##   events_repeat$City events_repeat$new_sub/2480 * 100
## 1            Unknown                       11.8548387
## 2             Taipei                        2.1370968
## 3             Moscow                        1.3709677
## 4              Lagos                        1.2096774
## 5              Tokyo                        1.0483871
## 6              Osaka                        0.9677419
# City with highest percentage of winbacks
city_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$City, data = events_repeat, FUN = sum)
city_win_back = city_win_back %>% arrange(desc(city_win_back[,2]))
head(city_win_back)
##   events_repeat$City events_repeat$win_back/8 * 100
## 1         Bridgeport                           50.0
## 2           Roppongi                           25.0
## 3       East Hanover                           12.5
## 4             Moscow                           12.5
## 5                                               0.0
## 6           Aabenraa                            0.0
# Countries with highest percentage of new subscribers
country_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Countrycode, data = events_repeat, FUN = sum)
country_new_sub = country_new_sub %>% arrange(desc(country_new_sub[,2]))
head(country_new_sub) 
##   events_repeat$Countrycode events_repeat$new_sub/2480 * 100
## 1                        JP                        28.387097
## 2                        US                        27.459677
## 3                        RU                         4.475806
## 4                        GB                         4.435484
## 5                        TW                         3.548387
## 6                        FR                         3.508065
# Countries with highest percentage of winbacks
country_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Countrycode, data = events_repeat, FUN = sum)
country_win_back = country_win_back %>% arrange(desc(country_win_back[,2]))
head(country_win_back) 
##   events_repeat$Countrycode events_repeat$win_back/8 * 100
## 1                        US                           62.5
## 2                        JP                           25.0
## 3                        RU                           12.5
## 4                        AE                            0.0
## 5                        AR                            0.0
## 6                        AT                            0.0
# Subscription Plans and percentage of new subscribers
plan_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Convname,FUN = sum)
plan_new_sub = plan_new_sub %>% arrange(desc(plan_new_sub[,2]))
head(plan_new_sub) 
##   events_repeat$Convname events_repeat$new_sub/2480 * 100
## 1        Individual plan                        88.104839
## 2           Student plan                         8.024194
## 3            Family plan                         3.870968
# Subscription Plans and percentage of winbacks
plan_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Convname,FUN = sum)
plan_win_back = plan_win_back %>% arrange(desc(plan_win_back[,2]))
head(plan_win_back)
##   events_repeat$Convname events_repeat$win_back/8 * 100
## 1        Individual plan                            100
## 2            Family plan                              0
## 3           Student plan                              0
# Devices and percentage of new customers
device_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Device,FUN = sum)
device_new_sub = device_new_sub %>% arrange(desc(device_new_sub[,2]))
device_new_sub 
##   events_repeat$Device events_repeat$new_sub/2480 * 100
## 1               mobile                      91.81451613
## 2              desktop                       5.04032258
## 3               tablet                       1.77419355
## 4        Mobile Device                       1.20967742
## 5         Mobile Phone                       0.08064516
## 6              crawler                       0.04032258
## 7              Desktop                       0.04032258
## 8               Tablet                       0.00000000
# Devices and percentage of winback
device_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Device,FUN = sum)
device_win_back = device_win_back %>% arrange(desc(device_win_back[,2]))
device_win_back 
##   events_repeat$Device events_repeat$win_back/8 * 100
## 1              desktop                           87.5
## 2               mobile                           12.5
## 3              crawler                            0.0
## 4              Desktop                            0.0
## 5        Mobile Device                            0.0
## 6         Mobile Phone                            0.0
## 7               tablet                            0.0
## 8               Tablet                            0.0
# Years and percentage of new subscribers
year_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Year,FUN = sum)
year_new_sub = year_new_sub %>% arrange(desc(year_new_sub[,2]))
year_new_sub 
##   events_repeat$Year events_repeat$new_sub/2480 * 100
## 1               2020                         78.10484
## 2               2019                         21.89516
# Years and percentage of winback
year_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Year,FUN = sum)
year_win_back = year_win_back %>% arrange(desc(year_win_back[,2]))
year_win_back 
##   events_repeat$Year events_repeat$win_back/8 * 100
## 1               2020                             75
## 2               2019                             25
# Months and percentage of new subscribers
month_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Month,FUN = sum)
month_new_sub = month_new_sub %>% arrange((month_new_sub[,1]))
month_new_sub
##   events_repeat$Month events_repeat$new_sub/2480 * 100
## 1                   1                      26.97580645
## 2                   2                      15.20161290
## 3                   3                      26.00806452
## 4                   4                       9.91935484
## 5                   9                       0.08064516
## 6                  10                       2.09677419
## 7                  11                       0.28225806
## 8                  12                      19.43548387
# Months and percentage of winback
month_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Month,FUN = sum)
month_win_back = month_win_back %>% arrange((month_win_back[,1]))
month_win_back 
##   events_repeat$Month events_repeat$win_back/8 * 100
## 1                   1                           37.5
## 2                   2                           12.5
## 3                   3                           12.5
## 4                   4                           12.5
## 5                   9                            0.0
## 6                  10                            0.0
## 7                  11                            0.0
## 8                  12                           25.0
# Artists with the highest percentage of new subscribers
artist_new_sub = aggregate(events_repeat$new_sub/2480*100~events_repeat$Name,FUN = sum)
artist_new_sub = artist_new_sub %>% arrange(desc(artist_new_sub[,2]))
artist_new_sub[1:10,]
##                           events_repeat$Name events_repeat$new_sub/2480 * 100
## 1                               Jackson Wang                        3.5080645
## 2                       Sunday Service Choir                        2.5806452
## 3  yui (FLOWER FLOWER) × ミゾベリョウ (odol)                        0.9274194
## 4                            Various Artists                        0.8467742
## 5                                     MEZZO”                        0.7661290
## 6                                   88rising                        0.7258065
## 7                                 Rich Brian                        0.7258065
## 8                                   Yoga Lin                        0.7258065
## 9                             Phương Anh Đào                        0.6854839
## 10                                      Tayc                        0.6854839
# Artists with the highest percentage of winback
artist_win_back = aggregate(events_repeat$win_back/8*100~events_repeat$Name,FUN = sum)
artist_win_back = artist_win_back %>% arrange(desc(artist_win_back[,2]))
artist_win_back[1:10,] 
##      events_repeat$Name events_repeat$win_back/8 * 100
## 1           Eye on Eyez                           50.0
## 2                Camilo                           12.5
## 3  Sunday Service Choir                           12.5
## 4                                                  0.0
## 5                   .mt                            0.0
## 6          [Alexandros]                            0.0
## 7    04 Limited Sazabys                            0.0
## 8               1070725                            0.0
## 9                 2Baba                            0.0
## 10                2live                            0.0

Percent Change in New Subscribers and Winbacks Who Used Linkfire From 2019 to 2020

year_new_sub = aggregate(events_repeat$new_sub~events_repeat$Year,FUN = sum)

# Percentage change in number of new subscribers who used Linkfire from 2019 to 2020
(1937-543)/543*100
## [1] 256.7219
year_win_back = aggregate(events_repeat$win_back~events_repeat$Year,FUN = sum)
# Percentage change in number of winbacks who used Linkfire from 2019 to 2020
(6-2)/2*100
## [1] 200

Interactive Visualizations

(hover over the bars to reveal more info)

g = ggplot(device_new_sub, aes(x=device_new_sub[,1],y=device_new_sub[,2])) + geom_bar(stat="identity") + xlab("Devices") + ylab("Number of New Subscribers") +ggtitle("Devices used by New Subscribers") 
ggplotly(g) # people who are new subscribers are on phones
g = ggplot(device_win_back, aes(x=device_win_back[,1],y=device_win_back[,2])) + geom_bar(stat="identity") + xlab("Devices") + ylab("Number of Winbacks") +ggtitle("Devices used by Winbacks")
ggplotly(g) # people who are won back are on desktops
g = ggplot(month_new_sub, aes(x=month_new_sub[,1],y=month_new_sub[,2])) + geom_bar(stat="identity") + xlab("Months") + ylab("Number of New Subscribers") +ggtitle("Monthly count of New Subscribers")
ggplotly(g) # there is an increase 
g = ggplot(month_win_back, aes(x=month_win_back[,1],y=month_win_back[,2])) + geom_bar(stat="identity") + xlab("Months") + ylab("Number of Winbacks") +ggtitle("Monthly count of Winbacks")
ggplotly(g)
g = ggplot(year_new_sub, aes(x=year_new_sub[,1],y=year_new_sub[,2])) + geom_bar(stat="identity") + xlab("Year") + ylab("Number of New Subscribers") +ggtitle("Yearly count of New Subscribers")
ggplotly(g)
g = ggplot(year_win_back, aes(x=year_win_back[,1],y=year_win_back[,2])) + geom_bar(stat="identity") + xlab("Year") + ylab("Number of Winbacks") +ggtitle("Yearly count of Winbacks")
ggplotly(g)
g = ggplot(artist_new_sub[1:10,], aes(x=artist_new_sub[1:10,1],y=artist_new_sub[1:10,2])) + geom_bar(stat="identity") + xlab("Artists") + ylab("Number of New Subscribers") +ggtitle("Artists with most New Subscribers")
ggplotly(g)
g = ggplot(artist_win_back[1:3,], aes(x=artist_win_back[1:3,1],y=artist_win_back[1:3,2])) + geom_bar(stat="identity") + xlab("Artists") + ylab("Number of Winbacks") +ggtitle("Artists with most Winbacks")
ggplotly(g)